

import os
import fitz  # PyMuPDF
import pdfplumber
from collections import OrderedDict
from pprint import pprint
from utils.functions import remove_extract_find_tables


def get_forms_list(find_tables, page_num, output_path, file_name):

    table_dict = OrderedDict()
    for num in range(len((find_tables))):
        left, top, right, bottom = find_tables[num].bbox
        # left, top, right, bottom = find_tables[num]._bbox
        table_dict[num] = [left, top, right, bottom]

    # pprint(table_dict)

    forms_list = []
    for table_num in range(len(find_tables)):  # 获取 forms_list = [{顶部:'', 底部:'', 路径:''}]

        images_path = os.path.join(output_path, file_name, 'tables', f'table_{file_name}_{page_num}_{table_num}.jpg')

        forms_dict = {}
        table_top = find_tables[table_num].bbox[1]
        table_bottom = find_tables[table_num].bbox[3]
        # 获取整个表格的边界框 (x0, top, x1, bottom)
        forms_dict['顶部'] = table_top
        forms_dict['底部'] = table_bottom
        forms_dict['路径'] = images_path.replace(os.sep, '/')
        forms_list.append(forms_dict)

    return forms_list